package edu.ycu.aladdin.collector.weibo;

import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * 微博内容爬取处理类.
 * @author <a href="mailto:wangruihuan@gmail.com">王瑞环</a>
 * @since 1.0
 */
// TODO 待补充
@Slf4j
@Component
@Deprecated
public abstract class WeiBoContentProcessor implements PageProcessor {

    private static final String START_SIGN = "var $render_data = [";
    private static final String END_SIGN = "var __wb_performance_data";

    @Override
    public void process(Page page) {
        final String script = page.getHtml().getDocument().getElementsByTag("script").get(1).data();

        int start = script.indexOf(START_SIGN);
        int end = script.indexOf(END_SIGN);
        System.out.println(start + "," + end);
        String substring = script.substring(start + START_SIGN.length(), end - 14);
        log.info("JSON:{}", substring);
    }
}
